clear
set more      off
set varabbrev off
set gr        off
set linesize 255

/* INPUTS */

local in_data_1999      "/data/rawdata/cleaned/iaf_1999_2000"
local in_data_2001      "/data/rawdata/cleaned/iaf_2001_2002"
local in_data_2003      "/data/rawdata/cleaned/iaf_2003_2004"
local in_data_2005      "/data/rawdata/cleaned/iaf_2005_2006"
local in_data_2007      "/data/rawdata/cleaned/iaf_2007_2009"
local in_data_2010      "/data/rawdata/cleaned/iaf_2010_2012"
local in_data_2013      "/data/rawdata/cleaned/iaf_2013_2016"

/* OUTPUTS */

local out_iaf "/data2/cedsei/data2/iaf_data"
local out_iaf2 "/data2/cedsei/data2/IAF_cleaned_v4"

*cd "/data2/cedsei/data2"

****************************************************************************************************

local load =0
if `load'==1 {
*** Read in all payment data sets
use             `in_data_1999'
append    using `in_data_2001'
destring  dagar kvarvarandedagar, replace force

append    using `in_data_2003'
destring  dagpeng               , replace force

append    using `in_data_2005'
append    using `in_data_2007'
destring  ersgrunddaglon        , replace force

append    using `in_data_2010'
append    using `in_data_2013'

save `out_iaf', replace 
}

clear 
use `out_iaf'
/*
*** Quick look at data
sum
tabmiss
*/

* Years covered in the data
gen       temp = floor(kassakortsvecka/100)
tab       temp, m
drop      temp

*** Rename some key variables
rename    dagar           days_per_week
rename    dagpeng         daily_benefit
rename    ersgrunddaglon  daily_wage

*** Data restrictions
* Keep only unemployment benefit spells, cf.
* https://www.iaf.se/globalassets/statistik/databaserna/astatdokumentation.pdf#page=137
keep if   erskod      == 1 // keeps 98% of data
* Drop unemployment benefit spells where only receives flat rate benefits ("grundersättning")
* https://www.iaf.se/globalassets/statistik/databaserna/astatdokumentation.pdf#page=137
drop if   inlist(ersnivakod,0,2)
* Drop if receives unemployment benefits while also being on pension
drop if   inlist(ersnivakod,6)
* Drop if temporary payments for public work, both for basic amount and for UI members. 
drop if   inlist(ersnivakod,20)

* Drop if missing values for number of payment days, daily wage, or daily benefits
drop if   mi(days_per_week)
drop if   mi(daily_benefit)
drop if   mi(daily_wage)
* Drop if daily wage or daily benefit is equal to zero
drop if   daily_wage    == 0
drop if   daily_benefit == 0

*** Collapse data at (person ID * payment week) level
cap drop  co
hashsort  lopnr kassakortsvecka
by        lopnr kassakortsvecka: gen co = _n
tab       co // Number of observations by person ID and payment week

// Days per week is sometimes negative 
gen       weekly_payment = days_per_week * daily_benefit

hashsort  lopnr kassakortsvecka co

gcollapse ///
(firstnm) erssnr          erskod        ///
                erdagarkod    ///
(sum)    weekly_payment  days_per_week ///
(max)     daily_wage  max_kvar=kvarvarandedagar max_ersnivkod = ersnivakod (min) min_kvar=kvarvarandedagar    min_ersnivkod = ersnivakod   min_daily_benefit=daily_benefit           ///
, by(lopnr kassakortsvecka)


*** Cumulative payment days used during a given benefit spell
hashsort  lopnr  kassakortsvecka
by        lopnr (kassakortsvecka): gen payment_days_cumul = sum(days_per_week)
gen       first100days = payment_days_cumul <= 100 if !mi(payment_days_cumul)

*** Create a normalized daily wage variable (i.e. "daily wage – benefit cap")
*** Cf. https://www.iaf.se/globalassets/statistik/information-om-uppgifterna-i-statistikdatabasen2.pdf#page=4
foreach date in 02jul2001 01jul2002 01jan2007 03mar2007 13sep2015 {
  local   week_of_`date' = year(d(`date'))*100 + week(d(`date')) // Integers of the format "YYYYWW"
}

gen       daily_wage_normalized = .
* Between 1997-12-29 and 2001-07-01, replacement rate was 80% and benefit cap was 580SEK
replace   daily_wage_normalized = daily_wage - 580/0.8  ///
  if kassakortsvecka < `week_of_02jul2001'
* Between 2001-07-02 and 2002-06-30, replacement rate was 80% and benefit cap was 680 SEK
*   for the first 100 days and then 580 SEK
replace   daily_wage_normalized = daily_wage - 680/0.8  ///
  if kassakortsvecka >= `week_of_02jul2001' & kassakortsvecka < `week_of_01jul2002' & first100days == 1
replace   daily_wage_normalized = daily_wage - 580/0.8  ///
  if kassakortsvecka >= `week_of_02jul2001' & kassakortsvecka < `week_of_01jul2002' & first100days == 0
* Between 2002-07-01 and 2006-12-31, replacement rate was 80% and benefit cap was 780 SEK
*   for the first 100 days and then 680 SEK
replace   daily_wage_normalized = daily_wage - 730/0.8  ///
  if kassakortsvecka >= `week_of_01jul2002' & kassakortsvecka < `week_of_01jan2007' & first100days == 1
replace   daily_wage_normalized = daily_wage - 680/0.8  ///
  if kassakortsvecka >= `week_of_01jul2002' & kassakortsvecka < `week_of_01jan2007' & first100days == 0

* Between 2007-07-01 and 2007-03-04, replacement rate was 80% and benefit cap was 680SEK
replace   daily_wage_normalized = daily_wage - 680/0.8  ///
  if kassakortsvecka >= `week_of_01jan2007' & kassakortsvecka < `week_of_03mar2007' & first100days == 0

* After 2007-07-04 (and until 2015-09-07), replacement rate was either 65%, 70%, or 80% 
*   (can see this directly from ersnivakod), and benefit cap was 680SEK
replace   daily_wage_normalized = daily_wage - 680/0.8  ///
  if kassakortsvecka >= `week_of_03mar2007' & kassakortsvecka < `week_of_13sep2015' & max_ersnivkod == 3
replace   daily_wage_normalized = daily_wage - 680/0.75 ///
  if kassakortsvecka >= `week_of_03mar2007' & kassakortsvecka < `week_of_13sep2015' & max_ersnivkod == 4
replace   daily_wage_normalized = daily_wage - 680/0.65 ///
  if kassakortsvecka >= `week_of_03mar2007' & kassakortsvecka < `week_of_13sep2015' & max_ersnivkod == 5
  
  
  replace   daily_wage_normalized = daily_wage - 910/0.8  ///
  if kassakortsvecka >= `week_of_13sep2015' & !mi(kassakortsvecka) & max_ersnivkod == 3
replace   daily_wage_normalized = daily_wage - 760/0.75 ///
  if kassakortsvecka >= `week_of_13sep2015' & !mi(kassakortsvecka) & max_ersnivkod == 4
replace   daily_wage_normalized = daily_wage - 760/0.65 ///
  if kassakortsvecka >= `week_of_13sep2015' & !mi(kassakortsvecka) & max_ersnivkod == 5

*** Collapse data at (person ID * spell ID * month) level
gen       year              = floor(kassakortsvecka/100)
gen       daily_benefit     = weekly_payment/days_per_week
gen       replacement_rate  = daily_benefit/daily_wage

tostring kassakortsvecka, gen(kassakortsvecka_string)
gen 	  week 		    = substr(kassakortsvecka_string,5,2)
destring week,replace
replace week =52 if week==53
gen 	  year_week = yw(year,week)
gen daily = dofw(year_week)
gen date = ym(year,month(daily))

hashsort  lopnr  date

/*
gcollapse ///
(mean)    days_per_week daily_benefit daily_wage              ///
          replacement_rate daily_wage_normalized              ///
(max)     days_per_week_max           = days_per_week         ///
          daily_benefit_max           = daily_benefit         ///
          daily_wage_max              = daily_wage            ///
          replacement_rate_max        = replacement_rate      ///
          daily_wage_normalized_max   = daily_wage_normalized ///
	  first100days_max	      = first100days	      ///	
(min)     days_per_week_min           = days_per_week         ///
          daily_benefit_min           = daily_benefit         ///
          daily_wage_min              = daily_wage            ///
          replacement_rate_min        = replacement_rate      ///
          daily_wage_normalized_min   = daily_wage_normalized ///
	  first100days_min	      = first100days	      ///
(firstnm) days_per_week_first         = days_per_week         ///
          daily_benefit_first         = daily_benefit         ///
          daily_wage_first            = daily_wage            ///
          replacement_rate_first      = replacement_rate      ///
          daily_wage_normalized_first = daily_wage_normalized ///
	  first100days_first	      = first100days	      ///
(lastnm)  days_per_week_last          = days_per_week         ///
          daily_benefit_last          = daily_benefit         ///
          daily_wage_last             = daily_wage            ///
          replacement_rate_last       = replacement_rate      ///
          daily_wage_normalized_last  = daily_wage_normalized ///
	  first100days_last	      = first100days	      ///
, by(lopnr erssnr date)
*/

gcollapse (mean) daily_wage_normalized replacement_rate (sum) days_per_week (max) maxdaily=daily_wage_normalized (min) first100days_min = first100days , by(lopnr date)
rename days_per_week days 
format date %tm
compress
save "`out_iaf2'.dta",replace
